#!/pro/bin/perl

# csv2xlsx: Convert csv to xlsx
#	   (m)'20 [30 Nov 2020] Copyright H.M.Brand 2007-2021

use 5.14.0;
use warnings;

our $VERSION = "1.11";

sub usage {
    my $err = shift and select STDERR;
    print <<"EOU";
usage: csv2xlsx [-s <sep>] [-q <quot>] [-w <width>] [-d <dtfmt>]
               [-o <xlsx>] [file.csv]
       -s <sep>   use <sep>   as seperator char, auto-detect, default = ','
                  The string "tab" is allowed.
       -e <esc>   use <esc>   as escape    char, auto-detect, default = '"'
                  The string "undef" is allowed.
       -q <quot>  use <quot>  as quotation char,              default = '"'
                  The string "undef" will disable quotation.
       -w <width> use <width> as default minimum column width default = 4
       -o <xlsx>  write output to file named <xlsx>, defaults
                  to input file name with .csv replaced with .xlsx
                  if from standard input, defaults to csv2xlsx.xlsx
       -F         allow formula's. Otherwise fields starting with
                  an equal sign are forced to string
         --Fa=aaa Define formula action: none/die/croak/diag/empty/undef
         --Ft     Formula's will be stored as text (formula actions: none)
         --Fd     Formula's will cause a die
         --Fc     Formula's will cause a croak
         --FD     Formula's will cause a warning (this is the default)
         --Fe     Formula's will be replaced by the empty string
         --Fu     Formula's will be replaced with an undefined cell
       -f         force usage of <xlsx> if already exists (unlink before use)
       -d <dtfmt> use <dtfmt> as date formats.   Default = 'dd-mm-yyyy'
       -C <C:fmt> use <fmt> as currency formats for currency <C>, no default
       -D cols    only convert dates in columns <cols>. Default is everywhere.
       -L N       limit export to N rows
       -u         CSV is UTF8
         --de     Some CSV fields might be double-encoded. Try to fix that.
       -m         merge multiple CSV's into a single xlsx (separate sheets)
		    -o is required, all arguments should be existing files
       -v [<lvl>] verbosity (default = 1)
EOU
    exit $err;
    } # usage

use Getopt::Long qw(:config bundling passthrough);
my $quo = '"';
my $esc = '"';
my $wdt = 4;		# Default minimal column width
my $fac = "diag";	# Formula action (default is warn only)
my $dtf = "dd-mm-yyyy";	# Date format
my $crf = "";		# Currency format, e.g.: $:### ### ##0.00
my $opt_v = 1;
my $dtc;

GetOptions (
    "help|?"		=> sub { usage (0); },
    "V|version"		=> sub { say $0 =~ s{.*/}{}r, " [$VERSION]"; exit 0; },

    "c|s|sep=s"		=> \my $sep, # Set after reading first line in attempt to auto-detect
    "q|quo=s"		=> \$quo,
    "e|esc=s"		=> \$esc,
    "w|width=i"		=> \$wdt,
    "o|x|out=s"		=> \my $xls,
    "d|date-fmt=s"	=> \$dtf,
    "D|date-col=s"	=> \$dtc,
    "C|curr-fmt=s"	=> \$crf,
    "f|force!"		=> \my $frc,
    "F|formulas!"	=> \my $frm,
      "Fa=s"		=> \$fac,
      "Ft"		=> sub { $fac = "none";		},
      "Fd"		=> sub { $fac = "die";		},
      "Fc"		=> sub { $fac = "croak";	},
      "FD"		=> sub { $fac = "diag";		},
      "Fe"		=> sub { $fac = "empty";	},
      "Fu"		=> sub { $fac = "undef";	},
    "u|utf-8|utf8!"	=> \my $utf,
      "de|fix-utf8!"	=> \my $dutf, # double encoded? \x{c3}\x{ab} => \x{100}
    "m|merge!"		=> \my $mrg,
    "L|row-limit=i"	=> \my $row_limit,
    "v|verbose:1"	=> \$opt_v,
    ) or usage (1);

if ($mrg) {
    my @csv;
    for (@ARGV) {
	if (m/\.xlsx?$/i) {
	    $xls and usage (1);
	    $xls = $_;
	    next;
	    }
	if (m/\.(?:csv|png|jpe?g|bmp|gif|tiff|xpm)$/i && -s) {
	    push @csv, $_;
	    next;
	    }
	warn "Argument $_ is not an existing (CSV) file\n";
	usage (1);
	}
    $xls && @csv or usage (1);
    @ARGV = @csv;
    }

my $base = @ARGV && -f $ARGV[0] ? $ARGV[0] : "csv2xlsx";
$xls ||= $base =~ s/(?:\.csv)?$/.xlsx/ir;

-s $xls && $frc and unlink $xls;
if (-s $xls) {
    print STDERR "File '$xls' already exists. Overwrite? [y/N] > N\b";
    scalar <STDIN> =~ m/^[yj](?:es|a)?$/i or exit;
    }
# Test if the file can be created
# The error from Excel::Writer::XLSX is not very informative if it fails
# e.g. when the folder cannot be used for whatever reason
{   open my $fh, ">", $xls or die "$xls: $!\n";
    close $fh;
    unlink $xls;
    }

# Don't split ourselves when modules do it _much_ better, and follow the standards
use Text::CSV_XS;
use Date::Calc qw( Delta_Days Days_in_Month );
use Excel::Writer::XLSX;
use Encode qw( from_to );

if ($dutf) {
    eval { require Encode::DoubleEncodedUTF8; };
    if ($@) {
	$dutf = 0;
	warn "Cannot load Encode::DoubleEncodedUTF8; --de ignored\n";
	}
    }

my $wbk = Excel::Writer::XLSX->new ($xls);
   $dtf =~ s/j/y/g;
my %fmt = (
    date => $wbk->add_format (align => "center", num_format => $dtf),
    rest => $wbk->add_format (align => "left"),
    wrap => $wbk->add_format (text_wrap => 1),
    );
$crf =~ s/^([^:]+):(.*)/$1/ and $fmt{currency} = $wbk->add_format (
    num_format	=> "$1 $2",
    align	=> "right",
    );

my @args = @ARGV ? @ARGV : ("");
foreach my $csvf (@args) {
    my $sheetname = $csvf =~ s{\.\w+$}{}ir =~ s{.*/}{}r || "Sheet 1";
    ($_ = length $sheetname) > 31 and substr $sheetname, 31, $_ - 31, "";
    my $wks = $wbk->add_worksheet ($sheetname);
    $utf && !$wks->can ("write_unicode") and $utf = 0;

    if ($csvf =~ m/\.(png|jpe?g|bmp|gif|tiff|xpm)$/i) {
	$wks->insert_image (1, 1, $csvf);
	next;
	}

    my ($h, $w, @w) = (0, 1); # data height, -width, and default column widths
    my $row;
    my $firstline;
    my $fh;
    if (-f $csvf) {
	$opt_v and say "Reading $csvf";
	open $fh, "<", $csvf or die "$csvf: $!\n";
	}
    else {
	$opt_v and say "Reading STDIN";
	$fh = *ARGV;
	}
    my $Sep = $sep;
    unless ($Sep) { # No sep char passed, try to auto-detect;
	while (<$fh>) {
	    m/\S/ or next;	# Skip empty leading blank lines
	    $Sep = # start auto-detect with quoted strings
		   m/["\d];["\d;]/    ? ";"  :
		   m/["\d],["\d,]/    ? ","  :
		   m/["\d]\t["\d,]/   ? "\t" :
		   # If neither, then for unquoted strings
		   m/\w;[\w;]/        ? ";"  :
		   m/\w,[\w,]/        ? ","  :
		   m/\w\t[\w,]/       ? "\t" :
		   # And pipes (lowest prio)
		   m/["\d]\|["\d,]/   ? "|"  :
		   m/\w\|[\w,]/       ? "|"  :
		   m/,/  && !m/[;\t]/ ? ","  :
		   m/;/  && !m/[,\t]/ ? ";"  :
		   m/\t/ && !m/[,;]/  ? "\t" :
					","  ;
	    $firstline = $_;
	    last;
	    }
	$firstline or die "The sourcefile does not contain any usable data\n";
	}
    my $csv = Text::CSV_XS-> new ({
	sep_char       => $Sep eq "tab"   ? "\t"  : $Sep,
	quote_char     => $quo eq "undef" ? undef : $quo,
	escape_char    => $esc eq "undef" ? undef : $esc,
	binary         => 1,
	keep_meta_info => 1,
	auto_diag      => 1,
	formula        => $fac,
	});
    if ($firstline) {
	$csv->parse ($firstline) or die $csv->error_diag ();
	$row = [ $csv->fields ];
	}
    if ($opt_v > 3) {
	foreach my $k (qw( sep_char quote_char escape_char )) {
	    my $c = $csv->$k () || "undef";
	    $c =~ s/\t/\\t/g;
	    $c =~ s/\r/\\r/g;
	    $c =~ s/\n/\\n/g;
	    $c =~ s/\0/\\0/g;
	    $c =~ s/([\x00-\x1f\x7f-\xff])/sprintf"\\x{%02x}",ord$1/ge;
	    printf STDERR "%-11s = %s\n", $k, $c;
	    }
	}

    if (my $rows = $dtc) {
	$rows =~ s/-$/-999/;			# 3,6-
	$rows =~ s/-/../g;
	eval "\$dtc = { map { \$_ => 1 } $rows }";
	}

    while ($row && @$row or $row = $csv->getline ($fh)) {
	$row_limit and $csv->record_number > $row_limit and last;
	my @row = @$row;
	@row > $w and push @w, ($wdt) x (($w = @row) - @w);
	foreach my $c (0 .. $#row) {
	    my $val = $row[$c] // "";
	    my $l = length $val;
	    $l > ($w[$c] // -1) and $w[$c] = $l;

	    $dutf and $csv->is_binary ($c) and utf8::valid ($val) and
		$val = Encode::decode ("utf-8-de", $val);

	    if ($utf and $csv->is_binary ($c)) {
		from_to ($val, "utf-8", "ucs2");
		$wks->write_unicode ($h, $c, $val);
		next;
		}

	    if ($csv->is_quoted ($c)) {
		$val =~ s/\r\n/\n/g;
		if ($utf) {
		    from_to ($val, "utf-8", "ucs2");
		    $val =~ m/\n/
			? $wks->write_unicode ($h, $c, $val, $fmt{wrap})
			: $wks->write_unicode ($h, $c, $val);
		    }
		else {
		    $val =~ m/\n/
			? $wks->write_string  ($h, $c, $val, $fmt{wrap})
			: $wks->write_string  ($h, $c, $val);
		    }
		next;
		}

	    if (!$dtc or $dtc->{$c + 1}) {
		my @d = (0, 0, 0);	# Y, M, D
		$val =~ m/^(\d{4})(\d{2})(\d{2})$/   and @d = ($1, $2, $3);
		$val =~ m/^(\d{2})-(\d{2})-(\d{4})$/ and @d = ($3, $2, $1);
		if ( $d[2] >=    1 && $d[2] <=   31 &&
		     $d[1] >=    1 && $d[1] <=   12 &&
		     $d[0] >= 1900 && $d[0] <= 2199) {
		    my $dm = Days_in_Month (@d[0,1]);
		    $d[2] <   1 and $d[2] = 1;
		    $d[2] > $dm and $d[2] = $dm;
		    my $dt = 2 + Delta_Days (1900, 1, 1, @d);
		    $wks->write ($h, $c, $dt, $fmt{date});
		    next;
		    }
		}
	    if ($crf and $val =~ m/^\s*\Q$crf\E\s*([0-9.]+)$/) {
		$wks->write ($h, $c, $1 + 0, $fmt{currency});
		next;
		}

	    if (!$frm && $val =~ m/^=/) {
		$wks->write_string  ($h, $c, $val);
		}
	    else {
		$wks->write ($h, $c, $val);
		}
	    }
	++$h % 100 or $opt_v && printf STDERR "%6d x %6d\r", $w, $h;
	} continue { $row = undef }
    close $fh;
    $opt_v && printf STDERR "%6d x %6d\n", $w, $h;

    $wks->set_column ($_, $_, $w[$_]) for 0 .. $#w;
    }
$opt_v and say "Writing $xls";
$wbk->close ();
